# import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import pycountry
%matplotlib inline
#offline- mode
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode()
# Importing high-level chart objects
import plotly.graph_objs as go
# Import tools
from plotly import tools
import cufflinks as cf
df = pd.read_csv('C://Users/Akara/Downloads/countries of the world.csv',decimal=',')
df.head(10)
| Country | Region | Population | Area (sq. mi.) | Pop. Density (per sq. mi.) | Coastline (coast/area ratio) | Net migration | Infant mortality (per 1000 births) | GDP ($ per capita) | Literacy (%) | Phones (per 1000) | Arable (%) | Crops (%) | Other (%) | Climate | Birthrate | Deathrate | Agriculture | Industry | Service | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | ASIA (EX. NEAR EAST) | 31056997 | 647500 | 48.0 | 0.00 | 23.06 | 163.07 | 700.0 | 36.0 | 3.2 | 12.13 | 0.22 | 87.65 | 1.0 | 46.60 | 20.34 | 0.380 | 0.240 | 0.380 |
| 1 | Albania | EASTERN EUROPE | 3581655 | 28748 | 124.6 | 1.26 | -4.93 | 21.52 | 4500.0 | 86.5 | 71.2 | 21.09 | 4.42 | 74.49 | 3.0 | 15.11 | 5.22 | 0.232 | 0.188 | 0.579 |
| 2 | Algeria | NORTHERN AFRICA | 32930091 | 2381740 | 13.8 | 0.04 | -0.39 | 31.00 | 6000.0 | 70.0 | 78.1 | 3.22 | 0.25 | 96.53 | 1.0 | 17.14 | 4.61 | 0.101 | 0.600 | 0.298 |
| 3 | American Samoa | OCEANIA | 57794 | 199 | 290.4 | 58.29 | -20.71 | 9.27 | 8000.0 | 97.0 | 259.5 | 10.00 | 15.00 | 75.00 | 2.0 | 22.46 | 3.27 | NaN | NaN | NaN |
| 4 | Andorra | WESTERN EUROPE | 71201 | 468 | 152.1 | 0.00 | 6.60 | 4.05 | 19000.0 | 100.0 | 497.2 | 2.22 | 0.00 | 97.78 | 3.0 | 8.71 | 6.25 | NaN | NaN | NaN |
| 5 | Angola | SUB-SAHARAN AFRICA | 12127071 | 1246700 | 9.7 | 0.13 | 0.00 | 191.19 | 1900.0 | 42.0 | 7.8 | 2.41 | 0.24 | 97.35 | NaN | 45.11 | 24.20 | 0.096 | 0.658 | 0.246 |
| 6 | Anguilla | LATIN AMER. & CARIB | 13477 | 102 | 132.1 | 59.80 | 10.76 | 21.03 | 8600.0 | 95.0 | 460.0 | 0.00 | 0.00 | 100.00 | 2.0 | 14.17 | 5.34 | 0.040 | 0.180 | 0.780 |
| 7 | Antigua & Barbuda | LATIN AMER. & CARIB | 69108 | 443 | 156.0 | 34.54 | -6.15 | 19.46 | 11000.0 | 89.0 | 549.9 | 18.18 | 4.55 | 77.27 | 2.0 | 16.93 | 5.37 | 0.038 | 0.220 | 0.743 |
| 8 | Argentina | LATIN AMER. & CARIB | 39921833 | 2766890 | 14.4 | 0.18 | 0.61 | 15.18 | 11200.0 | 97.1 | 220.4 | 12.31 | 0.48 | 87.21 | 3.0 | 16.73 | 7.55 | 0.095 | 0.358 | 0.547 |
| 9 | Armenia | C.W. OF IND. STATES | 2976372 | 29800 | 99.9 | 0.00 | -6.47 | 23.28 | 3500.0 | 98.6 | 195.7 | 17.55 | 2.30 | 80.15 | 4.0 | 12.07 | 8.23 | 0.239 | 0.343 | 0.418 |
df2 = df.iloc[:,2:]
df2
| Population | Area (sq. mi.) | Pop. Density (per sq. mi.) | Coastline (coast/area ratio) | Net migration | Infant mortality (per 1000 births) | GDP ($ per capita) | Literacy (%) | Phones (per 1000) | Arable (%) | Crops (%) | Other (%) | Climate | Birthrate | Deathrate | Agriculture | Industry | Service | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 31056997 | 647500 | 48.0 | 0.00 | 23.06 | 163.07 | 700.0 | 36.0 | 3.2 | 12.13 | 0.22 | 87.65 | 1.0 | 46.60 | 20.34 | 0.380 | 0.240 | 0.380 |
| 1 | 3581655 | 28748 | 124.6 | 1.26 | -4.93 | 21.52 | 4500.0 | 86.5 | 71.2 | 21.09 | 4.42 | 74.49 | 3.0 | 15.11 | 5.22 | 0.232 | 0.188 | 0.579 |
| 2 | 32930091 | 2381740 | 13.8 | 0.04 | -0.39 | 31.00 | 6000.0 | 70.0 | 78.1 | 3.22 | 0.25 | 96.53 | 1.0 | 17.14 | 4.61 | 0.101 | 0.600 | 0.298 |
| 3 | 57794 | 199 | 290.4 | 58.29 | -20.71 | 9.27 | 8000.0 | 97.0 | 259.5 | 10.00 | 15.00 | 75.00 | 2.0 | 22.46 | 3.27 | NaN | NaN | NaN |
| 4 | 71201 | 468 | 152.1 | 0.00 | 6.60 | 4.05 | 19000.0 | 100.0 | 497.2 | 2.22 | 0.00 | 97.78 | 3.0 | 8.71 | 6.25 | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 222 | 2460492 | 5860 | 419.9 | 0.00 | 2.98 | 19.62 | 800.0 | NaN | 145.2 | 16.90 | 18.97 | 64.13 | 3.0 | 31.67 | 3.92 | 0.090 | 0.280 | 0.630 |
| 223 | 273008 | 266000 | 1.0 | 0.42 | NaN | NaN | NaN | NaN | NaN | 0.02 | 0.00 | 99.98 | 1.0 | NaN | NaN | NaN | NaN | 0.400 |
| 224 | 21456188 | 527970 | 40.6 | 0.36 | 0.00 | 61.50 | 800.0 | 50.2 | 37.2 | 2.78 | 0.24 | 96.98 | 1.0 | 42.89 | 8.30 | 0.135 | 0.472 | 0.393 |
| 225 | 11502010 | 752614 | 15.3 | 0.00 | 0.00 | 88.29 | 800.0 | 80.6 | 8.2 | 7.08 | 0.03 | 92.90 | 2.0 | 41.00 | 19.93 | 0.220 | 0.290 | 0.489 |
| 226 | 12236805 | 390580 | 31.3 | 0.00 | 0.00 | 67.69 | 1900.0 | 90.7 | 26.8 | 8.32 | 0.34 | 91.34 | 2.0 | 28.01 | 21.84 | 0.179 | 0.243 | 0.579 |
227 rows × 18 columns
print('no of rows:{}'.format(df.shape[0]),'no of columns:{}'.format(df.shape[1]))
no of rows:227 no of columns:20
df.isnull().sum()
Country 0 Region 0 Population 0 Area (sq. mi.) 0 Pop. Density (per sq. mi.) 0 Coastline (coast/area ratio) 0 Net migration 3 Infant mortality (per 1000 births) 3 GDP ($ per capita) 1 Literacy (%) 18 Phones (per 1000) 4 Arable (%) 2 Crops (%) 2 Other (%) 2 Climate 22 Birthrate 3 Deathrate 4 Agriculture 15 Industry 16 Service 15 dtype: int64
df.columns
Index(['Country', 'Region', 'Population', 'Area (sq. mi.)',
'Pop. Density (per sq. mi.)', 'Coastline (coast/area ratio)',
'Net migration', 'Infant mortality (per 1000 births)',
'GDP ($ per capita)', 'Literacy (%)', 'Phones (per 1000)', 'Arable (%)',
'Crops (%)', 'Other (%)', 'Climate', 'Birthrate', 'Deathrate',
'Agriculture', 'Industry', 'Service'],
dtype='object')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 227 entries, 0 to 226 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 227 non-null object 1 Region 227 non-null object 2 Population 227 non-null int64 3 Area (sq. mi.) 227 non-null int64 4 Pop. Density (per sq. mi.) 227 non-null float64 5 Coastline (coast/area ratio) 227 non-null float64 6 Net migration 224 non-null float64 7 Infant mortality (per 1000 births) 224 non-null float64 8 GDP ($ per capita) 226 non-null float64 9 Literacy (%) 209 non-null float64 10 Phones (per 1000) 223 non-null float64 11 Arable (%) 225 non-null float64 12 Crops (%) 225 non-null float64 13 Other (%) 225 non-null float64 14 Climate 205 non-null float64 15 Birthrate 224 non-null float64 16 Deathrate 223 non-null float64 17 Agriculture 212 non-null float64 18 Industry 211 non-null float64 19 Service 212 non-null float64 dtypes: float64(16), int64(2), object(2) memory usage: 35.6+ KB
## creating an iso_alpha code of countries of easy use of visualizations
def do_fuzzy_search(country):
try:
result = pycountry.countries.search_fuzzy(country)
return result[0].alpha_3
except:
return np.nan
df["country_code"] = df["Country"].apply(lambda country: do_fuzzy_search(country))
# TO KNOW THE MISSING VALUE PERCENT
print("missing percent is:")
def missing_percent(df):
nan_percent= 100*(df.isnull().sum()/len(df))
nan_percent= nan_percent[nan_percent>0].sort_values()
return nan_percent
missing_percent(df)
missing percent is:
GDP ($ per capita) 0.440529 Arable (%) 0.881057 Crops (%) 0.881057 Other (%) 0.881057 Net migration 1.321586 Infant mortality (per 1000 births) 1.321586 Birthrate 1.321586 Phones (per 1000) 1.762115 Deathrate 1.762115 Agriculture 6.607930 Service 6.607930 Industry 7.048458 Literacy (%) 7.929515 Climate 9.691630 country_code 11.453744 dtype: float64
cols = df[['Net migration', 'Infant mortality (per 1000 births)',
'GDP ($ per capita)', 'Literacy (%)', 'Phones (per 1000)', 'Arable (%)',
'Crops (%)', 'Other (%)', 'Climate', 'Birthrate', 'Deathrate',
'Agriculture', 'Industry', 'Service']]
for i in cols:
df[i].fillna(df[i].mean(axis=0), inplace=True)
df.isnull().sum()
Country 0 Region 0 Population 0 Area (sq. mi.) 0 Pop. Density (per sq. mi.) 0 Coastline (coast/area ratio) 0 Net migration 0 Infant mortality (per 1000 births) 0 GDP ($ per capita) 0 Literacy (%) 0 Phones (per 1000) 0 Arable (%) 0 Crops (%) 0 Other (%) 0 Climate 0 Birthrate 0 Deathrate 0 Agriculture 0 Industry 0 Service 0 country_code 26 dtype: int64
df.rename(columns={'Literacy (%)': 'Literacy',
'Arable (%)':'Arable',
'Crops (%)':'Crops',
'Other (%)':'Other_factors',
'Pop. Density (per sq. mi.)':'Population_density',
'Coastline (coast/area ratio)':'Coastline',
'Area (sq. mi.)':'Area_land',
'Infant mortality (per 1000 births)':'Infant_mortality',
'GDP ($ per capita)':'GDP_PCP',
'Phones (per 1000)':'Phones'},inplace=True)
df.round(4)
| Country | Region | Population | Area_land | Population_density | Coastline | Net migration | Infant_mortality | GDP_PCP | Literacy | ... | Arable | Crops | Other_factors | Climate | Birthrate | Deathrate | Agriculture | Industry | Service | country_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | ASIA (EX. NEAR EAST) | 31056997 | 647500 | 48.0 | 0.00 | 23.0600 | 163.070 | 700.000 | 36.0000 | ... | 12.13 | 0.22 | 87.65 | 1.0 | 46.6000 | 20.3400 | 0.3800 | 0.2400 | 0.3800 | AFG |
| 1 | Albania | EASTERN EUROPE | 3581655 | 28748 | 124.6 | 1.26 | -4.9300 | 21.520 | 4500.000 | 86.5000 | ... | 21.09 | 4.42 | 74.49 | 3.0 | 15.1100 | 5.2200 | 0.2320 | 0.1880 | 0.5790 | ALB |
| 2 | Algeria | NORTHERN AFRICA | 32930091 | 2381740 | 13.8 | 0.04 | -0.3900 | 31.000 | 6000.000 | 70.0000 | ... | 3.22 | 0.25 | 96.53 | 1.0 | 17.1400 | 4.6100 | 0.1010 | 0.6000 | 0.2980 | DZA |
| 3 | American Samoa | OCEANIA | 57794 | 199 | 290.4 | 58.29 | -20.7100 | 9.270 | 8000.000 | 97.0000 | ... | 10.00 | 15.00 | 75.00 | 2.0 | 22.4600 | 3.2700 | 0.1508 | 0.2827 | 0.5653 | ASM |
| 4 | Andorra | WESTERN EUROPE | 71201 | 468 | 152.1 | 0.00 | 6.6000 | 4.050 | 19000.000 | 100.0000 | ... | 2.22 | 0.00 | 97.78 | 3.0 | 8.7100 | 6.2500 | 0.1508 | 0.2827 | 0.5653 | AND |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 222 | West Bank | NEAR EAST | 2460492 | 5860 | 419.9 | 0.00 | 2.9800 | 19.620 | 800.000 | 82.8383 | ... | 16.90 | 18.97 | 64.13 | 3.0 | 31.6700 | 3.9200 | 0.0900 | 0.2800 | 0.6300 | NaN |
| 223 | Western Sahara | NORTHERN AFRICA | 273008 | 266000 | 1.0 | 0.42 | 0.0381 | 35.507 | 9689.823 | 82.8383 | ... | 0.02 | 0.00 | 99.98 | 1.0 | 22.1147 | 9.2413 | 0.1508 | 0.2827 | 0.4000 | ESH |
| 224 | Yemen | NEAR EAST | 21456188 | 527970 | 40.6 | 0.36 | 0.0000 | 61.500 | 800.000 | 50.2000 | ... | 2.78 | 0.24 | 96.98 | 1.0 | 42.8900 | 8.3000 | 0.1350 | 0.4720 | 0.3930 | YEM |
| 225 | Zambia | SUB-SAHARAN AFRICA | 11502010 | 752614 | 15.3 | 0.00 | 0.0000 | 88.290 | 800.000 | 80.6000 | ... | 7.08 | 0.03 | 92.90 | 2.0 | 41.0000 | 19.9300 | 0.2200 | 0.2900 | 0.4890 | ZMB |
| 226 | Zimbabwe | SUB-SAHARAN AFRICA | 12236805 | 390580 | 31.3 | 0.00 | 0.0000 | 67.690 | 1900.000 | 90.7000 | ... | 8.32 | 0.34 | 91.34 | 2.0 | 28.0100 | 21.8400 | 0.1790 | 0.2430 | 0.5790 | ZWE |
227 rows × 21 columns
df.describe()
| Population | Area_land | Population_density | Coastline | Net migration | Infant_mortality | GDP_PCP | Literacy | Phones | Arable | Crops | Other_factors | Climate | Birthrate | Deathrate | Agriculture | Industry | Service | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2.270000e+02 | 2.270000e+02 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 | 227.000000 |
| mean | 2.874028e+07 | 5.982270e+05 | 379.047137 | 21.165330 | 0.038125 | 35.506964 | 9689.823009 | 82.838278 | 236.061435 | 13.797111 | 4.564222 | 81.638311 | 2.139024 | 22.114732 | 9.241345 | 0.150844 | 0.282711 | 0.565283 |
| std | 1.178913e+08 | 1.790282e+06 | 1660.185825 | 72.286863 | 4.856710 | 35.154225 | 10026.881258 | 18.920483 | 225.965194 | 12.982573 | 8.324390 | 16.069256 | 0.664484 | 11.102286 | 4.945670 | 0.141843 | 0.133288 | 0.160243 |
| min | 7.026000e+03 | 2.000000e+00 | 0.000000 | 0.000000 | -20.990000 | 2.290000 | 500.000000 | 17.600000 | 0.200000 | 0.000000 | 0.000000 | 33.330000 | 1.000000 | 7.290000 | 2.290000 | 0.000000 | 0.020000 | 0.062000 |
| 25% | 4.376240e+05 | 4.647500e+03 | 29.150000 | 0.100000 | -0.905000 | 8.215000 | 1900.000000 | 76.400000 | 38.500000 | 3.380000 | 0.190000 | 71.985000 | 2.000000 | 12.725000 | 5.975000 | 0.040000 | 0.200000 | 0.443500 |
| 50% | 4.786994e+06 | 8.660000e+04 | 78.800000 | 0.730000 | 0.000000 | 21.050000 | 5600.000000 | 90.300000 | 181.600000 | 10.530000 | 1.080000 | 85.380000 | 2.000000 | 18.900000 | 8.170000 | 0.116000 | 0.282000 | 0.565283 |
| 75% | 1.749777e+07 | 4.418110e+05 | 190.150000 | 10.345000 | 0.980000 | 55.335000 | 15700.000000 | 97.800000 | 382.900000 | 20.000000 | 4.557111 | 95.230000 | 2.319512 | 29.770000 | 10.580000 | 0.211000 | 0.335000 | 0.671500 |
| max | 1.313974e+09 | 1.707520e+07 | 16271.500000 | 870.660000 | 23.060000 | 191.190000 | 55100.000000 | 100.000000 | 1035.600000 | 62.110000 | 50.680000 | 100.000000 | 4.000000 | 50.730000 | 29.740000 | 0.769000 | 0.906000 | 0.954000 |
#df['Literacy']= df.apply(lambda x: x.Literacy * 100,axis=1)
#df['Crops']= df.apply(lambda x: x.Crops * 100,axis=1)
#df['Others_factors']= df.apply(lambda x: x.Other_factors * 100,axis=1)
#df['Arable']= df.apply(lambda x: x.Arable * 100,axis=1)
# CORRELATIONS OF THE DATASET
plt.figure(figsize=(30,20))
sns.pairplot(df2)
plt.show()
<Figure size 2160x1440 with 0 Axes>
# CORRELATION ALSO
plt.figure(figsize=(16,12))
sns.heatmap(data=df.iloc[:,2:].corr(),annot=True,fmt='.2f',cmap='coolwarm')
plt.show()
top_deathrates=df.nlargest(10,['Net migration'] , keep='all')
#top_deathrates
fig=px.bar(top_deathrates , y='Net migration',x='Country',title='TOP TEN NET MIGRATION COUNTRIES',text='Deathrate',
color_discrete_sequence=px.colors.sequential.Magma_r)
fig.update_traces(texttemplate="%{text:.2s}" ,textposition='outside')
top_population_coun=df.nlargest(20,'Population')
last_population_coun=df.nsmallest(20,'Population')
top_last_population_coun=pd.concat([top_population_coun,last_population_coun])
#top_population_coun
fig = px.bar(top_population_coun ,y ='Population',x = 'Country', text='Population',title='THE TOP POPULATED COUNTRIES',
color_discrete_sequence=px.colors.sequential.Darkmint,template='plotly_dark')
fig.update_traces(texttemplate="%{text:.2s}" ,textposition='outside')
fig.update_layout(uniformtext_minsize=8)
fig.update_layout(xaxis_tickangle=-45)
fig
fig = px.bar(last_population_coun ,y ='Population',x = 'Country', text='Population',title='THE LAST POPULATED COUNTRIES')
fig.update_traces(texttemplate="%{text:.2s}" ,textposition='outside')
fig.update_traces(marker_color='rgb(267,006,225)', marker_line_color='rgb(200,408,127)',
marker_line_width=1.5, opacity=0.6)
fig.update_layout(uniformtext_minsize=8)
fig.update_layout(xaxis_tickangle=-45)
fig
px.bar(df, x='Region',y='Coastline',title='ANALTSIS ON THE COASTLINE COUNTRIES ACCORDING TO REGION',
hover_name='Country',color_discrete_sequence=px.colors.sequential.Jet,log_y=True,)
fig = px.scatter_geo(df, locations='country_code',hover_name='Country',
size='Arable', projection='stereographic',title='THE ARABLE LAND WORLDWIDE COUNTRY VISUAL',
color_discrete_sequence=px.colors.sequential.gray,template='plotly',)
fig
fig= px.choropleth(df,locations='country_code',color='Arable',color_continuous_scale=px.colors.sequential.Cividis_r,
scope='europe',title='further analysis on the europe arable land',hover_name='Country')
fig
fig = px.sunburst(df, path=['Region', 'Country'], values='Population',
color='Area_land', hover_data=['country_code'],
color_continuous_scale='Oryel',
color_continuous_midpoint=np.average(df['Area_land'], weights=df['Population']),
title='Population and Area (square miles)(640arce==1area[square miles])')
fig.show()
fig = px.choropleth(df,locations='country_code',color='Climate',
color_continuous_scale='oxy',range_color=(0,12),hover_name='Country',
title='GLOBAL CLIMATE(%) ANALYSIS',projection='mollweide')
fig
#df3.rename({'df.Infant_mortality(per 1000 births)':'infant_mortality'},inplace=True)
trace0 = go.Scatter(x =df.Infant_mortality,
y =df.Birthrate,
mode = "markers",
name = "Birthrate(%)",
marker = dict(size = 12, color = "rgba(255, 70, 0, 0.9)"),xaxis='x1',yaxis='y2')
trace1 = go.Scatter(x =df.Infant_mortality,
y =df.Deathrate,
name = "Deathrate(%)",
mode = "markers",
marker = dict(size = 12, color = "rgba(0, 190, 255, 0.9)"),xaxis='x2',yaxis='y2')
fig = tools.make_subplots(rows = 1,
cols = 2,
subplot_titles = ("birthrate", "deathrate"),)
fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)
fig.layout.update(title = "Correlation between infant_morality ,birthrate and Deathrate")
iplot(fig);
C:\Users\Akara\anaconda3\lib\site-packages\plotly\tools.py:460: DeprecationWarning: plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead
#dir(px.colors.sequential)
#dir(px.colors.qualitative)
#pie chart
#infant morality
fig = px.pie(df, values='Infant_mortality', names='Region',color_discrete_sequence=px.colors.sequential.BuGn,
template='plotly_dark',
title='piechart depicting(the number of deaths per 1,000 live births of children under the age of one year)in the region')
fig.update_traces(textposition='inside')
fig.update_layout(uniformtext_minsize=12, uniformtext_mode=False,)
fig.show()
#donutchart
#deathrate
px.pie(df,values='Deathrate',names='Region',hole=0.5,color_discrete_sequence=px.colors.sequential.Burgyl,
template='plotly_dark',title='A DONUT CHART DEATHRATE ANALYSIS')
px.box(df['GDP_PCP'],hover_name=df['Country'] ,template='plotly_dark',title='A boxplot on the GDP',
color_discrete_sequence=px.colors.sequential.Burgyl)
high_GDP =df.nlargest(15,'GDP_PCP', keep='all')
low_GDP =df.nsmallest(15,'GDP_PCP')
px.bar(high_GDP ,y='Country',x='GDP_PCP',color_discrete_sequence=px.colors.sequential.Rainbow,template='plotly',
text_auto='GDP_PCP',orientation='h',title='A BAR CHART ON THE TOP TEN GDP COUNTRIES')
px.bar(low_GDP ,y='Country',x='GDP_PCP',color_discrete_sequence=px.colors.sequential.Cividis,template='plotly',
text_auto='GDP_PCP',orientation='h',title='A BAR CHART ON LAST GDP COUNTRIES')
#industry
g=df['Industry'].max()
f=df['Industry'].min()
high_con_indu =df.nlargest(20,'Industry')
fig = px.scatter(df ,x='Country',y='Industry',size='Industry',color_discrete_sequence=px.colors.sequential.Viridis,
log_y=True,range_y=[0.01,2],marginal_x='box',title='INDUSTRIES ANALYSIS ON COUNTRIES',width=1000,
height=700)
fig.show()
fig.write_html('C://Users/Akara/Downloads/industries.html')
#service
#df['Service'].max()
df['Service'].min()
px.area(df,y='Service',x='Country',hover_name='Country',color_discrete_sequence=px.colors.sequential.gray_r,
markers=True,template='plotly_dark',title='A SERVICE AREA CHART BY COUNTRIES')
#LITERACY
px.histogram(df, x='Country',y='Literacy',height=600,width=1000,marginal='violin',
color_discrete_sequence=px.colors.sequential.Sunsetdark,template='plotly_dark',
title='LITERACY ANALYSIS ON COUNTRIES',range_y=[20,100])
#Agriculture
px.violin(df,y='Agriculture',box=True,points='all',color_discrete_sequence=px.colors.sequential.Plasma_r,
template='plotly_dark',title='A VIOLIN ON AGRICULTURE BASED ON COUNTRIES',hover_name='Country')
px.area(df,x='Country',y='Population_density',log_y=True,markers=True,
color_discrete_sequence=px.colors.sequential.Aggrnyl,title='A AREA CHART ON THE POPULATION DENSITY')
fig=px.choropleth(df,locations='country_code',color='Population_density',basemap_visible=True,
color_continuous_scale=px.colors.sequential.solar,projection='robinson',scope='africa',
range_color=[0,51],title='A AFRICA CHOROPLETH MAP ON THE POPULATION DENSITY')
fig.show()
GDP=df.corr()['GDP_PCP']
GDP_CORR=pd.DataFrame(GDP)
cm1 = sns.light_palette('green',as_cmap=True)
GDP_CORR.style.background_gradient(cmap=cm1)
| GDP_PCP | |
|---|---|
| Population | -0.039319 |
| Area_land | 0.072179 |
| Population_density | 0.195751 |
| Coastline | 0.049109 |
| Net migration | 0.381969 |
| Infant_mortality | -0.600002 |
| GDP_PCP | 1.000000 |
| Literacy | 0.497963 |
| Phones | 0.830549 |
| Arable | 0.019643 |
| Crops | -0.218330 |
| Other_factors | 0.097215 |
| Climate | 0.302404 |
| Birthrate | -0.648808 |
| Deathrate | -0.201148 |
| Agriculture | -0.570735 |
| Industry | -0.027935 |
| Service | 0.529995 |
#machine learnig dataframe copy
df3=pd.DataFrame(df)
gdp_per_capita=df['GDP_PCP']
df3 =df3.drop(['Country', 'Region', 'Population','Area_land','Birthrate', 'Deathrate',
'Agriculture', 'Industry', 'country_code','Coastline','Infant_mortality', 'Arable',
'Crops', 'Other_factors','GDP_PCP','Climate'],axis=1)
df3['GDP_per_capita($)']=gdp_per_capita
df3
| Population_density | Net migration | Literacy | Phones | Service | GDP_per_capita($) | |
|---|---|---|---|---|---|---|
| 0 | 48.0 | 23.060000 | 36.000000 | 3.200000 | 0.380000 | 700.000000 |
| 1 | 124.6 | -4.930000 | 86.500000 | 71.200000 | 0.579000 | 4500.000000 |
| 2 | 13.8 | -0.390000 | 70.000000 | 78.100000 | 0.298000 | 6000.000000 |
| 3 | 290.4 | -20.710000 | 97.000000 | 259.500000 | 0.565283 | 8000.000000 |
| 4 | 152.1 | 6.600000 | 100.000000 | 497.200000 | 0.565283 | 19000.000000 |
| ... | ... | ... | ... | ... | ... | ... |
| 222 | 419.9 | 2.980000 | 82.838278 | 145.200000 | 0.630000 | 800.000000 |
| 223 | 1.0 | 0.038125 | 82.838278 | 236.061435 | 0.400000 | 9689.823009 |
| 224 | 40.6 | 0.000000 | 50.200000 | 37.200000 | 0.393000 | 800.000000 |
| 225 | 15.3 | 0.000000 | 80.600000 | 8.200000 | 0.489000 | 800.000000 |
| 226 | 31.3 | 0.000000 | 90.700000 | 26.800000 | 0.579000 | 1900.000000 |
227 rows × 6 columns
X = df[['Phones']].to_numpy()
y = df[['GDP_PCP']].to_numpy()
# splitting into training and test data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=0.3,random_state=0)
#train the model on training set
from sklearn.linear_model import LinearRegression
ML= LinearRegression()
ML.fit(X_train,y_train)
y_pred =ML.predict(X_test)
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)*100
73.10225063724188
import statsmodels.formula.api as smpi
zl=smpi.ols(formula='Phones~GDP_PCP',data=df).fit()
zl.summary()
| Dep. Variable: | Phones | R-squared: | 0.690 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.688 |
| Method: | Least Squares | F-statistic: | 500.4 |
| Date: | Mon, 01 Aug 2022 | Prob (F-statistic): | 4.10e-59 |
| Time: | 12:23:19 | Log-Likelihood: | -1419.2 |
| No. Observations: | 227 | AIC: | 2842. |
| Df Residuals: | 225 | BIC: | 2849. |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 54.6951 | 11.654 | 4.693 | 0.000 | 31.730 | 77.661 |
| GDP_PCP | 0.0187 | 0.001 | 22.369 | 0.000 | 0.017 | 0.020 |
| Omnibus: | 53.457 | Durbin-Watson: | 1.795 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 205.452 |
| Skew: | 0.890 | Prob(JB): | 2.44e-45 |
| Kurtosis: | 7.307 | Cond. No. | 1.94e+04 |
#ML.predict([[]])
#linear multiple regression
IFM=df.corr()['Infant_mortality']
IFM_CORR =pd.DataFrame(IFM)
cm = sns.light_palette("orange", as_cmap=True)
IFM_CORR.style.background_gradient(cmap=cm)
| Infant_mortality | |
|---|---|
| Population | 0.022994 |
| Area_land | -0.007159 |
| Population_density | -0.144200 |
| Coastline | -0.136465 |
| Net migration | -0.025015 |
| Infant_mortality | 1.000000 |
| GDP_PCP | -0.600002 |
| Literacy | -0.745543 |
| Phones | -0.669715 |
| Arable | -0.113094 |
| Crops | -0.061159 |
| Other_factors | 0.123072 |
| Climate | -0.342848 |
| Birthrate | 0.844968 |
| Deathrate | 0.655734 |
| Agriculture | 0.697303 |
| Industry | 0.003510 |
| Service | -0.618623 |
#y = mx +c
# Generally: y[i] = alpha + (beta_1 * x_1[i]) + (beta_2 * x_2[i]) + (beta_3 * x_3[i]) + error
# Model: y_hat[i] = alpha_hat + (beta_1_hat * x_1[i]) + (beta_2_hat * x_2[i]) + (beta_3_hat * x_3[i])
#alpha_hat = y_intercept = C
X = df[['Birthrate']].to_numpy()
y= df[['Infant_mortality']].to_numpy()
tl = LinearRegression()
tl.fit(X,y)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
y_pred=tl.predict(X)
plt.scatter(X,y)
plt.plot(X,y_pred ,color='r')
[<matplotlib.lines.Line2D at 0x1e269e3d3d0>]
print(tl.coef_)
[[2.67550327]]
print(tl.intercept_)
[-23.66107379]
from sklearn.metrics import r2_score
r2_score(y,y_pred)
0.7139712734799849
# machine learning predictions on infant mortality
X=df[['Birthrate','Agriculture','Deathrate']].to_numpy()
y=df[['Infant_mortality']].to_numpy()
X
array([[4.66000000e+01, 3.80000000e-01, 2.03400000e+01],
[1.51100000e+01, 2.32000000e-01, 5.22000000e+00],
[1.71400000e+01, 1.01000000e-01, 4.61000000e+00],
[2.24600000e+01, 1.50844340e-01, 3.27000000e+00],
[8.71000000e+00, 1.50844340e-01, 6.25000000e+00],
[4.51100000e+01, 9.60000000e-02, 2.42000000e+01],
[1.41700000e+01, 4.00000000e-02, 5.34000000e+00],
[1.69300000e+01, 3.80000000e-02, 5.37000000e+00],
[1.67300000e+01, 9.50000000e-02, 7.55000000e+00],
[1.20700000e+01, 2.39000000e-01, 8.23000000e+00],
[1.10300000e+01, 4.00000000e-03, 6.68000000e+00],
[1.21400000e+01, 3.80000000e-02, 7.51000000e+00],
[8.74000000e+00, 1.80000000e-02, 9.76000000e+00],
[2.07400000e+01, 1.41000000e-01, 9.75000000e+00],
[1.75700000e+01, 3.00000000e-02, 9.05000000e+00],
[1.78000000e+01, 5.00000000e-03, 4.14000000e+00],
[2.98000000e+01, 1.99000000e-01, 8.27000000e+00],
[1.27100000e+01, 6.00000000e-02, 8.67000000e+00],
[1.11600000e+01, 9.30000000e-02, 1.40200000e+01],
[1.03800000e+01, 1.00000000e-02, 1.02700000e+01],
[2.88400000e+01, 1.42000000e-01, 5.72000000e+00],
[3.88500000e+01, 3.16000000e-01, 1.22200000e+01],
[1.14000000e+01, 1.00000000e-02, 7.74000000e+00],
[3.36500000e+01, 2.58000000e-01, 1.27000000e+01],
[2.33000000e+01, 1.28000000e-01, 7.53000000e+00],
[8.77000000e+00, 1.42000000e-01, 8.27000000e+00],
[2.30800000e+01, 2.40000000e-02, 2.95000000e+01],
[1.65600000e+01, 8.40000000e-02, 6.17000000e+00],
[1.48900000e+01, 1.80000000e-02, 4.42000000e+00],
[1.87900000e+01, 3.60000000e-02, 3.45000000e+00],
[9.65000000e+00, 9.30000000e-02, 1.42700000e+01],
[4.56200000e+01, 3.22000000e-01, 1.56000000e+01],
[1.79100000e+01, 5.64000000e-01, 9.83000000e+00],
[4.22200000e+01, 4.63000000e-01, 1.34600000e+01],
[2.69000000e+01, 3.50000000e-01, 9.06000000e+00],
[3.38900000e+01, 4.48000000e-01, 1.34700000e+01],
[1.07800000e+01, 2.20000000e-02, 7.80000000e+00],
[2.48700000e+01, 1.21000000e-01, 6.55000000e+00],
[1.27400000e+01, 1.40000000e-02, 4.89000000e+00],
[3.39100000e+01, 5.50000000e-01, 1.86500000e+01],
[4.57300000e+01, 3.35000000e-01, 1.63800000e+01],
[1.52300000e+01, 6.00000000e-02, 5.81000000e+00],
[1.32500000e+01, 1.25000000e-01, 6.97000000e+00],
[2.04800000e+01, 1.25000000e-01, 5.58000000e+00],
[3.69300000e+01, 4.00000000e-01, 8.20000000e+00],
[4.36900000e+01, 5.50000000e-01, 1.32700000e+01],
[4.25700000e+01, 6.20000000e-02, 1.29300000e+01],
[2.10000000e+01, 1.51000000e-01, 9.24134529e+00],
[1.83200000e+01, 8.80000000e-02, 4.36000000e+00],
[3.51100000e+01, 2.79000000e-01, 1.48400000e+01],
[9.61000000e+00, 7.00000000e-02, 1.14800000e+01],
[1.18900000e+01, 5.50000000e-02, 7.22000000e+00],
[1.25600000e+01, 3.70000000e-02, 7.68000000e+00],
[9.02000000e+00, 3.40000000e-02, 1.05900000e+01],
[1.11300000e+01, 1.80000000e-02, 1.03600000e+01],
[3.95300000e+01, 1.79000000e-01, 1.93100000e+01],
[1.52700000e+01, 1.77000000e-01, 6.73000000e+00],
[2.32200000e+01, 1.12000000e-01, 5.73000000e+00],
[2.69900000e+01, 8.50000000e-02, 6.24000000e+00],
[2.22900000e+01, 7.00000000e-02, 4.23000000e+00],
[2.29400000e+01, 1.49000000e-01, 5.23000000e+00],
[2.66100000e+01, 9.90000000e-02, 5.78000000e+00],
[3.55900000e+01, 3.00000000e-02, 1.50600000e+01],
[3.43300000e+01, 1.02000000e-01, 9.60000000e+00],
[1.00400000e+01, 4.00000000e-02, 1.32500000e+01],
[3.79800000e+01, 4.75000000e-01, 1.48600000e+01],
[1.40500000e+01, 2.70000000e-01, 8.70000000e+00],
[2.25500000e+01, 8.90000000e-02, 5.65000000e+00],
[1.04500000e+01, 2.80000000e-02, 9.86000000e+00],
[1.19900000e+01, 2.20000000e-02, 9.14000000e+00],
[2.04600000e+01, 6.60000000e-02, 4.88000000e+00],
[1.66800000e+01, 3.10000000e-02, 4.69000000e+00],
[3.61600000e+01, 6.10000000e-02, 1.22500000e+01],
[3.93700000e+01, 3.08000000e-01, 1.22500000e+01],
[3.94500000e+01, 3.00000000e-02, 3.80000000e+00],
[1.04100000e+01, 1.72000000e-01, 9.23000000e+00],
[8.25000000e+00, 9.00000000e-03, 1.06200000e+01],
[3.05200000e+01, 3.66000000e-01, 9.72000000e+00],
[1.07400000e+01, 1.50844340e-01, 9.31000000e+00],
[9.68000000e+00, 5.40000000e-02, 1.02400000e+01],
[1.59300000e+01, 1.50844340e-01, 7.84000000e+00],
[2.20800000e+01, 5.40000000e-02, 6.88000000e+00],
[1.50500000e+01, 1.50000000e-01, 6.09000000e+00],
[1.87900000e+01, 1.50844340e-01, 4.48000000e+00],
[2.98800000e+01, 2.27000000e-01, 5.20000000e+00],
[8.81000000e+00, 3.00000000e-02, 1.00100000e+01],
[4.17600000e+01, 2.37000000e-01, 1.54800000e+01],
[3.72200000e+01, 6.20000000e-01, 1.65300000e+01],
[1.82800000e+01, 3.70000000e-01, 8.28000000e+00],
[3.64400000e+01, 2.80000000e-01, 1.21700000e+01],
[2.82400000e+01, 1.39000000e-01, 5.28000000e+00],
[7.29000000e+00, 1.00000000e-03, 6.29000000e+00],
[9.72000000e+00, 3.70000000e-02, 1.31100000e+01],
[1.36400000e+01, 8.60000000e-02, 6.72000000e+00],
[2.20100000e+01, 1.86000000e-01, 8.18000000e+00],
[2.03400000e+01, 1.34000000e-01, 6.25000000e+00],
[1.70000000e+01, 1.16000000e-01, 5.55000000e+00],
[3.19800000e+01, 7.30000000e-02, 5.37000000e+00],
[1.44500000e+01, 5.00000000e-02, 7.82000000e+00],
[1.10500000e+01, 1.00000000e-02, 1.11900000e+01],
[1.79700000e+01, 2.60000000e-02, 6.18000000e+00],
[8.72000000e+00, 2.10000000e-02, 1.04000000e+01],
[2.08200000e+01, 4.90000000e-02, 6.52000000e+00],
[9.37000000e+00, 1.70000000e-02, 9.16000000e+00],
[9.30000000e+00, 5.00000000e-02, 9.28000000e+00],
[2.12500000e+01, 3.30000000e-02, 2.65000000e+00],
[1.60000000e+01, 6.70000000e-02, 9.42000000e+00],
[3.97200000e+01, 1.63000000e-01, 1.40200000e+01],
[3.06500000e+01, 8.90000000e-02, 8.26000000e+00],
[1.55400000e+01, 3.00000000e-01, 7.13000000e+00],
[1.00000000e+01, 3.30000000e-02, 5.85000000e+00],
[2.19400000e+01, 4.00000000e-03, 2.41000000e+00],
[2.28000000e+01, 3.53000000e-01, 7.08000000e+00],
[3.54900000e+01, 4.55000000e-01, 1.15500000e+01],
[9.24000000e+00, 4.00000000e-02, 1.36600000e+01],
[1.85200000e+01, 1.20000000e-01, 6.21000000e+00],
[2.47500000e+01, 1.63000000e-01, 2.87100000e+01],
[4.47700000e+01, 7.69000000e-01, 2.31000000e+01],
[2.64900000e+01, 7.60000000e-02, 3.48000000e+00],
[1.02100000e+01, 6.00000000e-02, 7.18000000e+00],
[8.75000000e+00, 5.50000000e-02, 1.09800000e+01],
[1.19400000e+01, 1.00000000e-02, 8.41000000e+00],
[8.48000000e+00, 1.00000000e-03, 4.47000000e+00],
[1.20200000e+01, 1.18000000e-01, 8.77000000e+00],
[4.14100000e+01, 2.76000000e-01, 1.11100000e+01],
[4.31300000e+01, 3.42000000e-01, 1.93300000e+01],
[2.28600000e+01, 8.40000000e-02, 5.05000000e+00],
[3.48100000e+01, 2.00000000e-01, 7.06000000e+00],
[4.98200000e+01, 4.50000000e-01, 1.68900000e+01],
[1.02200000e+01, 3.00000000e-02, 8.10000000e+00],
[3.30500000e+01, 3.17000000e-01, 4.78000000e+00],
[1.37400000e+01, 6.00000000e-02, 6.48000000e+00],
[4.09900000e+01, 2.50000000e-01, 1.21600000e+01],
[1.54300000e+01, 5.90000000e-02, 6.86000000e+00],
[4.09500000e+01, 1.50844340e-01, 7.70000000e+00],
[2.06900000e+01, 3.80000000e-02, 4.74000000e+00],
[2.46800000e+01, 2.89000000e-01, 4.75000000e+00],
[1.57000000e+01, 2.13000000e-01, 1.26400000e+01],
[9.19000000e+00, 1.70000000e-01, 1.29100000e+01],
[2.15900000e+01, 2.06000000e-01, 6.95000000e+00],
[1.75900000e+01, 1.50844340e-01, 7.10000000e+00],
[2.19800000e+01, 2.17000000e-01, 5.58000000e+00],
[3.51800000e+01, 2.62000000e-01, 2.13500000e+01],
[2.43200000e+01, 9.70000000e-02, 1.88600000e+01],
[2.47600000e+01, 1.50844340e-01, 6.70000000e+00],
[3.09800000e+01, 3.80000000e-01, 9.31000000e+00],
[1.09000000e+01, 2.10000000e-02, 8.68000000e+00],
[1.47800000e+01, 1.00000000e-02, 6.45000000e+00],
[1.81100000e+01, 1.50000000e-01, 5.69000000e+00],
[1.37600000e+01, 4.30000000e-02, 7.53000000e+00],
[2.45100000e+01, 1.65000000e-01, 4.45000000e+00],
[5.07300000e+01, 3.90000000e-01, 2.09100000e+01],
[4.04300000e+01, 2.69000000e-01, 1.69400000e+01],
[1.94300000e+01, 1.50844340e-01, 2.29000000e+00],
[1.14600000e+01, 2.10000000e-02, 9.40000000e+00],
[3.62400000e+01, 2.70000000e-02, 3.81000000e+00],
[2.97400000e+01, 2.16000000e-01, 8.23000000e+00],
[1.80300000e+01, 6.20000000e-02, 6.80000000e+00],
[2.17400000e+01, 6.80000000e-02, 5.36000000e+00],
[2.93600000e+01, 3.53000000e-01, 7.25000000e+00],
[2.91000000e+01, 2.24000000e-01, 4.49000000e+00],
[2.04800000e+01, 8.00000000e-02, 6.23000000e+00],
[2.48900000e+01, 1.44000000e-01, 5.41000000e+00],
[9.85000000e+00, 5.00000000e-02, 9.89000000e+00],
[1.07200000e+01, 5.30000000e-02, 1.05000000e+01],
[1.27700000e+01, 1.00000000e-02, 7.65000000e+00],
[1.55600000e+01, 2.00000000e-03, 4.72000000e+00],
[1.89000000e+01, 8.00000000e-02, 5.49000000e+00],
[1.07000000e+01, 1.01000000e-01, 1.17700000e+01],
[9.95000000e+00, 5.40000000e-02, 1.46500000e+01],
[4.03700000e+01, 4.01000000e-01, 1.60900000e+01],
[1.21300000e+01, 1.50844340e-01, 6.53000000e+00],
[1.80200000e+01, 3.50000000e-02, 8.33000000e+00],
[1.96800000e+01, 7.00000000e-02, 5.08000000e+00],
[1.35200000e+01, 1.50844340e-01, 6.83000000e+00],
[1.61800000e+01, 1.00000000e-01, 5.98000000e+00],
[1.64300000e+01, 1.14000000e-01, 6.62000000e+00],
[1.00200000e+01, 1.50844340e-01, 8.17000000e+00],
[4.02500000e+01, 1.67000000e-01, 6.47000000e+00],
[2.93400000e+01, 3.30000000e-02, 2.58000000e+00],
[3.27800000e+01, 1.72000000e-01, 9.42000000e+00],
[2.21147321e+01, 1.66000000e-01, 9.24134529e+00],
[1.60300000e+01, 3.20000000e-02, 6.29000000e+00],
[4.57600000e+01, 4.90000000e-01, 2.30300000e+01],
[9.34000000e+00, 0.00000000e+00, 4.28000000e+00],
[1.06500000e+01, 3.50000000e-02, 9.45000000e+00],
[8.98000000e+00, 2.80000000e-02, 1.03100000e+01],
[3.00100000e+01, 4.20000000e-01, 3.92000000e+00],
[4.51300000e+01, 6.50000000e-01, 1.66300000e+01],
[1.82000000e+01, 2.50000000e-02, 2.20000000e+01],
[1.00600000e+01, 4.00000000e-02, 9.72000000e+00],
[1.55100000e+01, 1.78000000e-01, 6.52000000e+00],
[3.45300000e+01, 3.87000000e-01, 8.97000000e+00],
[1.80200000e+01, 1.30000000e-01, 7.27000000e+00],
[2.74100000e+01, 1.19000000e-01, 2.97400000e+01],
[1.02700000e+01, 1.10000000e-02, 1.03100000e+01],
[9.71000000e+00, 1.50000000e-02, 8.49000000e+00],
[2.77600000e+01, 2.49000000e-01, 4.81000000e+00],
[1.25600000e+01, 1.80000000e-02, 6.48000000e+00],
[3.26500000e+01, 2.34000000e-01, 8.25000000e+00],
[3.77100000e+01, 4.32000000e-01, 1.63900000e+01],
[1.38700000e+01, 9.90000000e-02, 7.04000000e+00],
[3.70100000e+01, 3.95000000e-01, 9.83000000e+00],
[2.53700000e+01, 2.30000000e-01, 5.28000000e+00],
[1.29000000e+01, 7.00000000e-03, 1.05700000e+01],
[1.55200000e+01, 1.32000000e-01, 5.13000000e+00],
[1.66200000e+01, 1.17000000e-01, 5.97000000e+00],
[2.76100000e+01, 2.09000000e-01, 8.60000000e+00],
[2.18400000e+01, 1.50844340e-01, 4.21000000e+00],
[2.21800000e+01, 1.66000000e-01, 7.11000000e+00],
[4.73500000e+01, 3.11000000e-01, 1.22400000e+01],
[8.82000000e+00, 1.87000000e-01, 1.43900000e+01],
[1.89600000e+01, 4.00000000e-02, 4.40000000e+00],
[1.07100000e+01, 5.00000000e-03, 1.01300000e+01],
[1.41400000e+01, 1.00000000e-02, 8.26000000e+00],
[1.39100000e+01, 9.30000000e-02, 9.05000000e+00],
[2.63600000e+01, 3.42000000e-01, 7.84000000e+00],
[2.27200000e+01, 2.60000000e-01, 7.82000000e+00],
[1.87100000e+01, 4.00000000e-02, 4.92000000e+00],
[1.68600000e+01, 2.09000000e-01, 6.22000000e+00],
[1.39600000e+01, 1.00000000e-02, 6.43000000e+00],
[2.21147321e+01, 1.50844340e-01, 9.24134529e+00],
[3.16700000e+01, 9.00000000e-02, 3.92000000e+00],
[2.21147321e+01, 1.50844340e-01, 9.24134529e+00],
[4.28900000e+01, 1.35000000e-01, 8.30000000e+00],
[4.10000000e+01, 2.20000000e-01, 1.99300000e+01],
[2.80100000e+01, 1.79000000e-01, 2.18400000e+01]])
y
array([[163.07 ],
[ 21.52 ],
[ 31. ],
[ 9.27 ],
[ 4.05 ],
[191.19 ],
[ 21.03 ],
[ 19.46 ],
[ 15.18 ],
[ 23.28 ],
[ 5.89 ],
[ 4.69 ],
[ 4.66 ],
[ 81.74 ],
[ 25.21 ],
[ 17.27 ],
[ 62.6 ],
[ 12.5 ],
[ 13.37 ],
[ 4.68 ],
[ 25.69 ],
[ 85. ],
[ 8.53 ],
[100.44 ],
[ 53.11 ],
[ 21.05 ],
[ 54.58 ],
[ 29.61 ],
[ 18.05 ],
[ 12.61 ],
[ 20.55 ],
[ 97.57 ],
[ 67.24 ],
[ 69.29 ],
[ 71.48 ],
[ 68.26 ],
[ 4.75 ],
[ 47.77 ],
[ 8.19 ],
[ 91. ],
[ 93.82 ],
[ 8.8 ],
[ 24.18 ],
[ 20.97 ],
[ 74.93 ],
[ 94.69 ],
[ 93.86 ],
[ 35.50696429],
[ 9.95 ],
[ 90.83 ],
[ 6.84 ],
[ 6.33 ],
[ 7.18 ],
[ 3.93 ],
[ 4.56 ],
[104.13 ],
[ 14.15 ],
[ 32.38 ],
[ 47.41 ],
[ 23.66 ],
[ 32.59 ],
[ 25.1 ],
[ 85.13 ],
[ 74.87 ],
[ 7.87 ],
[ 95.32 ],
[ 6.24 ],
[ 12.62 ],
[ 3.57 ],
[ 4.26 ],
[ 12.07 ],
[ 8.44 ],
[ 53.64 ],
[ 72.02 ],
[ 22.93 ],
[ 18.59 ],
[ 4.16 ],
[ 51.43 ],
[ 5.13 ],
[ 5.53 ],
[ 15.82 ],
[ 14.62 ],
[ 8.6 ],
[ 6.94 ],
[ 35.93 ],
[ 4.71 ],
[ 90.37 ],
[107.17 ],
[ 33.26 ],
[ 73.45 ],
[ 29.32 ],
[ 2.97 ],
[ 8.57 ],
[ 3.31 ],
[ 56.29 ],
[ 35.6 ],
[ 41.58 ],
[ 50.25 ],
[ 5.39 ],
[ 5.93 ],
[ 7.03 ],
[ 5.94 ],
[ 12.36 ],
[ 3.26 ],
[ 5.24 ],
[ 17.35 ],
[ 29.21 ],
[ 61.47 ],
[ 48.52 ],
[ 24.04 ],
[ 7.05 ],
[ 9.95 ],
[ 35.64 ],
[ 85.22 ],
[ 9.55 ],
[ 24.52 ],
[ 84.23 ],
[128.87 ],
[ 24.6 ],
[ 4.7 ],
[ 6.89 ],
[ 4.81 ],
[ 4.39 ],
[ 10.09 ],
[ 76.83 ],
[103.32 ],
[ 17.7 ],
[ 56.52 ],
[116.79 ],
[ 3.89 ],
[ 29.45 ],
[ 7.09 ],
[ 70.89 ],
[ 15.03 ],
[ 62.4 ],
[ 20.91 ],
[ 30.21 ],
[ 40.42 ],
[ 5.43 ],
[ 53.79 ],
[ 7.35 ],
[ 41.62 ],
[130.79 ],
[ 48.98 ],
[ 9.95 ],
[ 66.98 ],
[ 5.04 ],
[ 10.03 ],
[ 7.72 ],
[ 5.85 ],
[ 29.11 ],
[121.69 ],
[ 98.8 ],
[ 7.11 ],
[ 3.7 ],
[ 19.51 ],
[ 72.44 ],
[ 14.84 ],
[ 20.47 ],
[ 51.45 ],
[ 25.63 ],
[ 31.94 ],
[ 23.51 ],
[ 8.51 ],
[ 5.05 ],
[ 8.24 ],
[ 18.61 ],
[ 7.78 ],
[ 26.43 ],
[ 15.39 ],
[ 91.23 ],
[ 19. ],
[ 14.49 ],
[ 13.53 ],
[ 7.54 ],
[ 14.78 ],
[ 27.71 ],
[ 5.73 ],
[ 43.11 ],
[ 13.24 ],
[ 55.51 ],
[ 12.89 ],
[ 15.53 ],
[143.64 ],
[ 2.29 ],
[ 7.41 ],
[ 4.45 ],
[ 21.29 ],
[116.7 ],
[ 61.81 ],
[ 4.42 ],
[ 14.35 ],
[ 62.5 ],
[ 23.57 ],
[ 69.27 ],
[ 2.77 ],
[ 4.39 ],
[ 29.53 ],
[ 6.4 ],
[110.76 ],
[ 98.54 ],
[ 20.48 ],
[ 66.61 ],
[ 12.62 ],
[ 24.31 ],
[ 24.77 ],
[ 41.04 ],
[ 73.08 ],
[ 15.67 ],
[ 20.03 ],
[ 67.83 ],
[ 20.34 ],
[ 14.51 ],
[ 5.16 ],
[ 6.5 ],
[ 11.95 ],
[ 71.1 ],
[ 55.16 ],
[ 22.2 ],
[ 25.95 ],
[ 8.03 ],
[ 35.50696429],
[ 19.62 ],
[ 35.50696429],
[ 61.5 ],
[ 88.29 ],
[ 67.69 ]])
df.head()
| Country | Region | Population | Area_land | Population_density | Coastline | Net migration | Infant_mortality | GDP_PCP | Literacy | ... | Arable | Crops | Other_factors | Climate | Birthrate | Deathrate | Agriculture | Industry | Service | country_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | ASIA (EX. NEAR EAST) | 31056997 | 647500 | 48.0 | 0.00 | 23.06 | 163.07 | 700.0 | 36.0 | ... | 12.13 | 0.22 | 87.65 | 1.0 | 46.60 | 20.34 | 0.380000 | 0.240000 | 0.380000 | AFG |
| 1 | Albania | EASTERN EUROPE | 3581655 | 28748 | 124.6 | 1.26 | -4.93 | 21.52 | 4500.0 | 86.5 | ... | 21.09 | 4.42 | 74.49 | 3.0 | 15.11 | 5.22 | 0.232000 | 0.188000 | 0.579000 | ALB |
| 2 | Algeria | NORTHERN AFRICA | 32930091 | 2381740 | 13.8 | 0.04 | -0.39 | 31.00 | 6000.0 | 70.0 | ... | 3.22 | 0.25 | 96.53 | 1.0 | 17.14 | 4.61 | 0.101000 | 0.600000 | 0.298000 | DZA |
| 3 | American Samoa | OCEANIA | 57794 | 199 | 290.4 | 58.29 | -20.71 | 9.27 | 8000.0 | 97.0 | ... | 10.00 | 15.00 | 75.00 | 2.0 | 22.46 | 3.27 | 0.150844 | 0.282711 | 0.565283 | ASM |
| 4 | Andorra | WESTERN EUROPE | 71201 | 468 | 152.1 | 0.00 | 6.60 | 4.05 | 19000.0 | 100.0 | ... | 2.22 | 0.00 | 97.78 | 3.0 | 8.71 | 6.25 | 0.150844 | 0.282711 | 0.565283 | AND |
5 rows × 21 columns
# splitting into training and test data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=0.3,random_state=0)
#train the model on training set
from sklearn.linear_model import LinearRegression
ML= LinearRegression()
ML.fit(X_train,y_train)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
#predict the test set result
y_pred =ML.predict(X_test)
#print(y_predict)
#ML.predict([[46.60,38.000000,20.34]])
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error,accuracy_score
r2_score(y_test,y_pred)*100
75.442300383338
plt.scatter(y_test,y_pred)
<matplotlib.collections.PathCollection at 0x1e269eb8af0>
print(ML.score(X_test,y_test)*100)
print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
75.442300383338 273.7885737129123 10.774000892106692
#ML.predict([[163]])